{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "tsfresh提取特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tsfresh import select_features\n",
    "from tsfresh import extract_features\n",
    "# from tsfresh.feature_extraction import EfficientFCParameters\n",
    "# from tsfresh.feature_extraction import ComprehensiveFCParameters\n",
    "from joblib import Parallel, delayed\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "minimal_parameters = {\n",
    "     'length': None,\n",
    "     'abs_energy': None,\n",
    "    \n",
    "    'mean':None,\n",
    "    'mean_abs_change': None,\n",
    "    'mean_change': None,\n",
    "     'variance': None,\n",
    "     'skewness': None,\n",
    "     'kurtosis': None,\n",
    "\n",
    "}\n",
    "\n",
    "# fc_parameters = {\n",
    "#     'length': None,\n",
    "#     'abs_energy': None,\n",
    "    \n",
    "#      'median': None,\n",
    "#      'mean': None,\n",
    "#      'standard_deviation': None,\n",
    "#      'variance': None,\n",
    "#      'skewness': None,\n",
    "#      'kurtosis': None,\n",
    "\n",
    "#      'sample_entropy': None,\n",
    "\n",
    "#      'ar_coefficient': [{'coeff': 0, 'k': 10},\n",
    "#                           {'coeff': 1, 'k': 10},\n",
    "#                           {'coeff': 2, 'k': 10},\n",
    "#                           {'coeff': 3, 'k': 10},\n",
    "#                           {'coeff': 4, 'k': 10}],\n",
    "   \n",
    "#     'fft_aggregated': [{'aggtype': 'centroid'},\n",
    "#                           {'aggtype': 'variance'},\n",
    "#                           {'aggtype': 'skew'},\n",
    "#                           {'aggtype': 'kurtosis'}],\n",
    "\n",
    "#     'agg_autocorrelation': [{'f_agg': 'mean'},\n",
    "#                             {'f_agg': 'median'},\n",
    "#                             {'f_agg': 'var'}],\n",
    "\n",
    "#     'number_cwt_peaks': [{'n': 1}, {'n': 5}],\n",
    "#     'binned_entropy': [{'max_bins': 10}],\n",
    "#     'approximate_entropy': [{'m': 2, 'r': 0.1},\n",
    "#     {'m': 2, 'r': 0.3},\n",
    "#     {'m': 2, 'r': 0.5},\n",
    "#     {'m': 2, 'r': 0.7},\n",
    "#     {'m': 2, 'r': 0.9}],\n",
    "#     'linear_trend': [{'attr': 'pvalue'},\n",
    "#     {'attr': 'rvalue'},\n",
    "#     {'attr': 'intercept'},\n",
    "#     {'attr': 'slope'},\n",
    "#     {'attr': 'stderr'}],\n",
    "# }\n",
    "settings = minimal_parameters\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_tsfresh_feature_basis(sensor,settings):\n",
    "    '''\n",
    "    描述：\n",
    "        settings，使用tsfresh提取指定特征\n",
    "    参数：\n",
    "        sensor：\n",
    "        settings(dict): 想要提取的特征\n",
    "    '''\n",
    "\n",
    "    extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=48, \n",
    "                                          default_fc_parameters=settings, disable_progressbar = False)\n",
    "    return extracted_features\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_tsfresh_feature_parallel(data_no, csv_nos, settings):\n",
    "    '''\n",
    "    描述：\n",
    "        并行提取多个senosr文件的tsfresh特征\n",
    "    参数：\n",
    "        data_no：第几个plc\n",
    "        csv_nos：plc对应的sensor文件个数\n",
    "    '''\n",
    "    \n",
    "    input_dir = './sensors_ad/%s/'%data_no\n",
    "    output_dir = './sensors_tsfresh_minimal/%s/'%data_no\n",
    "    \n",
    "    if not os.path.exists('./sensors_tsfresh_minimal/'):\n",
    "        os.mkdir('./sensors_tsfresh_minimal')\n",
    "    if not os.path.exists('./sensors_tsfresh_minimal/%s'%data_no):\n",
    "        os.mkdir('./sensors_tsfresh_minimal/%s'%data_no)\n",
    "    \n",
    "    def basis_func(idx):\n",
    "        sensor = joblib.load(input_dir + '%d.lz4'%idx)\n",
    "        tmp = gen_tsfresh_feature_basis(sensor, settings)\n",
    "        joblib.dump(tmp, output_dir+'%d.lz4'%idx, compress='lz4')\n",
    "        \n",
    "    Parallel(n_jobs=10,verbose=1)(delayed(basis_func)(i) for i in range(1,csv_nos+1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
      "[Parallel(n_jobs=10)]: Done  48 out of  48 | elapsed:  2.4min finished\n"
     ]
    }
   ],
   "source": [
    "gen_tsfresh_feature_parallel('01', 48, settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
      "[Parallel(n_jobs=10)]: Done  48 out of  48 | elapsed:  2.4min finished\n"
     ]
    }
   ],
   "source": [
    "gen_tsfresh_feature_parallel('02', 48 , settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
      "[Parallel(n_jobs=10)]: Done  37 out of  37 | elapsed:  1.9min finished\n"
     ]
    }
   ],
   "source": [
    "gen_tsfresh_feature_parallel('03', 37, settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import joblib\n",
    "# test = joblib.load('./sensors_tsfresh_minimal/01/1.lz4')\n",
    "# test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
